poly <- readRDS(paste0(clean_path, "population_poly.rds"))
poly <- poly %>% select(research_case_id, bp, hr, temperature, inr, iss, iss_cat,
invasive, gcs, gcs_cat, sex, age, age_cat, bleeding, fracture,
concussion, brain_edema, brain_compression, unconsciousness)
# What to do with NA's?
# For heart rate and blood pressure must be documentation mistakes (always measured), so I exclude them in the analysis
poly <- poly %>% drop_na(bp, hr)
# Also, a HR of 865 seems hardly normal, take it out
poly <- poly %>% filter(hr < 250)
# For temp and inr missing probably indicates no problem and thus not measured. I will try to fit a model where I remove the NA's and another model where I convert the variables to ordered factors and classify NA's as in normal range (ASSUMPTION!).
# Create long-format dataset for BP and HR (hemorrhage)
poly_hemo <- melt(poly,
id.vars = c("research_case_id", "gcs_cat", "iss_cat", "invasive",
"sex", "age_cat", "bleeding", "fracture", "concussion",
"brain_edema", "brain_compression", "unconsciousness"),
measure.vars = c("bp", "hr"),
variable.name = "vital_sign",
value.name = "value")
# Create combined GCS and vital sign variable
poly_hemo[, gcs_vital := paste(gcs_cat, vital_sign, sep = "_")]
# Distribution of ordered factors
table(poly$gcs_cat)
##
## 0.unknown 1.mild 2.moderate 3.severe
## 1530 4801 215 529
table(poly$iss_cat)
##
## 1.1-8 2.9-15 3.16-24 4.25+
## 3665 2330 891 189
table(poly$age_cat)
##
## 1.<30 2.30-39 3.40-49 4.50-59 5.60-69 6.70-78 7.79+
## 1196 956 843 935 808 866 1471
# Summary statistics of key variables
summary(poly[, .(bp, hr, temperature, inr)])
## bp hr temperature inr
## Min. : 0.00 Min. : 0.00 Min. :23.10 Min. : 0.80
## 1st Qu.: 85.00 1st Qu.: 70.00 1st Qu.:36.40 1st Qu.: 1.10
## Median : 95.00 Median : 80.00 Median :36.70 Median : 1.10
## Mean : 95.81 Mean : 81.21 Mean :36.67 Mean : 1.22
## 3rd Qu.:106.00 3rd Qu.: 90.00 3rd Qu.:37.10 3rd Qu.: 1.20
## Max. :185.67 Max. :183.00 Max. :43.90 Max. :10.00
## NA's :618 NA's :380
# Check missing values in key variables
missing_values <- colSums(is.na(poly))
print(missing_values)
## research_case_id bp hr temperature
## 0 0 0 618
## inr iss iss_cat invasive
## 380 0 0 0
## gcs gcs_cat sex age
## 1530 0 0 0
## age_cat bleeding fracture concussion
## 0 0 0 0
## brain_edema brain_compression unconsciousness
## 0 0 0
# Distribution of key outcome variables
par(mfrow=c(2,2))
hist(poly$bp, main="Blood Pressure Distribution", xlab="BP")
hist(poly$hr, main="Heart Rate Distribution", xlab="HR")
hist(poly$temperature, main="Temperature Distribution", xlab="Temperature")
hist(poly$inr, main="INR Distribution", xlab="INR")
par(mfrow=c(1,1))
# Examine relationships between ordered factors and outcomes
boxplot(bp ~ gcs_cat, data = poly,
main = "Blood Pressure by GCS Category", xlab = "GCS Category", ylab = "BP")
boxplot(hr ~ gcs_cat, data = poly,
main = "Heart Rate by GCS Category", xlab = "GCS Category", ylab = "HR")
boxplot(temperature ~ gcs_cat, data = poly,
main = "Temperature by GCS Category", xlab = "GCS Category", ylab = "Temperature")
boxplot(inr ~ gcs_cat, data = poly,
main = "INR by GCS Category", xlab = "GCS Category", ylab = "INR")
model1 <- lmer(value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat +
bleeding + fracture + concussion + brain_edema + brain_compression +
unconsciousness + (1|research_case_id), data = poly_hemo)
# Alternative model with sex interaction
model1_sex <- lmer(value ~ vital_sign * gcs_cat + vital_sign * sex + iss_cat + invasive +
age_cat + bleeding + fracture + concussion + brain_edema +
brain_compression + unconsciousness + (1|research_case_id),
data = poly_hemo)
# Model summaries
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat +
## bleeding + fracture + concussion + brain_edema + brain_compression +
## unconsciousness + (1 | research_case_id)
## Data: poly_hemo
##
## REML criterion at convergence: 119549.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8052 -0.6345 -0.0586 0.5535 5.1260
##
## Random effects:
## Groups Name Variance Std.Dev.
## research_case_id (Intercept) 29.81 5.46
## Residual 245.89 15.68
## Number of obs: 14150, groups: research_case_id, 7075
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.297e+01 5.305e-01 1.041e+04 175.256 < 2e-16 ***
## vital_signhr -9.910e+00 4.770e-01 7.071e+03 -20.774 < 2e-16 ***
## gcs_cat.L -7.392e+00 6.831e-01 1.317e+04 -10.821 < 2e-16 ***
## gcs_cat.Q -3.756e+00 7.297e-01 1.383e+04 -5.148 2.67e-07 ***
## gcs_cat.C -6.839e-01 8.037e-01 1.392e+04 -0.851 0.3948
## iss_cat.L -5.756e-01 6.647e-01 7.054e+03 -0.866 0.3865
## iss_cat.Q -1.334e+00 5.357e-01 7.054e+03 -2.489 0.0128 *
## iss_cat.C 1.550e-01 3.884e-01 7.054e+03 0.399 0.6898
## invasive1 6.818e-01 7.635e-01 7.054e+03 0.893 0.3719
## sexm 7.219e-01 3.183e-01 7.054e+03 2.268 0.0233 *
## age_cat.L 3.341e+00 3.881e-01 7.054e+03 8.609 < 2e-16 ***
## age_cat.Q 9.456e-02 3.816e-01 7.054e+03 0.248 0.8043
## age_cat.C 5.400e-01 3.961e-01 7.054e+03 1.363 0.1728
## age_cat^4 5.121e-01 4.023e-01 7.054e+03 1.273 0.2031
## age_cat^5 8.605e-01 4.210e-01 7.054e+03 2.044 0.0410 *
## age_cat^6 -3.532e-01 4.177e-01 7.054e+03 -0.846 0.3978
## bleeding 2.505e-01 5.345e-01 7.054e+03 0.469 0.6394
## fracture -3.396e-01 4.958e-01 7.054e+03 -0.685 0.4934
## concussion 2.469e-01 4.006e-01 7.054e+03 0.616 0.5377
## brain_edema 2.827e+00 2.248e+00 7.054e+03 1.258 0.2085
## brain_compression 1.964e+00 2.983e+00 7.054e+03 0.658 0.5103
## unconsciousness -1.796e+00 4.120e-01 7.054e+03 -4.360 1.32e-05 ***
## vital_signhr:gcs_cat.L 1.598e+01 8.261e-01 7.071e+03 19.346 < 2e-16 ***
## vital_signhr:gcs_cat.Q 5.461e+00 9.540e-01 7.071e+03 5.724 1.08e-08 ***
## vital_signhr:gcs_cat.C 7.024e-01 1.067e+00 7.071e+03 0.658 0.5103
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model1_sex)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## value ~ vital_sign * gcs_cat + vital_sign * sex + iss_cat + invasive +
## age_cat + bleeding + fracture + concussion + brain_edema +
## brain_compression + unconsciousness + (1 | research_case_id)
## Data: poly_hemo
##
## REML criterion at convergence: 119548.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.8098 -0.6339 -0.0583 0.5553 5.1305
##
## Random effects:
## Groups Name Variance Std.Dev.
## research_case_id (Intercept) 29.8 5.459
## Residual 245.9 15.681
## Number of obs: 14150, groups: research_case_id, 7075
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 9.311e+01 5.571e-01 1.176e+04 167.142 < 2e-16 ***
## vital_signhr -1.019e+01 5.859e-01 7.070e+03 -17.398 < 2e-16 ***
## gcs_cat.L -7.370e+00 6.836e-01 1.318e+04 -10.782 < 2e-16 ***
## gcs_cat.Q -3.760e+00 7.297e-01 1.383e+04 -5.153 2.59e-07 ***
## gcs_cat.C -6.764e-01 8.038e-01 1.392e+04 -0.842 0.4000
## sexm 4.950e-01 4.189e-01 1.379e+04 1.182 0.2374
## iss_cat.L -5.756e-01 6.647e-01 7.054e+03 -0.866 0.3865
## iss_cat.Q -1.334e+00 5.357e-01 7.054e+03 -2.489 0.0128 *
## iss_cat.C 1.550e-01 3.884e-01 7.054e+03 0.399 0.6898
## invasive1 6.818e-01 7.635e-01 7.054e+03 0.893 0.3719
## age_cat.L 3.341e+00 3.881e-01 7.054e+03 8.609 < 2e-16 ***
## age_cat.Q 9.456e-02 3.816e-01 7.054e+03 0.248 0.8043
## age_cat.C 5.400e-01 3.961e-01 7.054e+03 1.363 0.1728
## age_cat^4 5.121e-01 4.023e-01 7.054e+03 1.273 0.2031
## age_cat^5 8.605e-01 4.210e-01 7.054e+03 2.044 0.0410 *
## age_cat^6 -3.532e-01 4.177e-01 7.054e+03 -0.846 0.3978
## bleeding 2.505e-01 5.345e-01 7.054e+03 0.469 0.6394
## fracture -3.396e-01 4.958e-01 7.054e+03 -0.685 0.4934
## concussion 2.469e-01 4.006e-01 7.054e+03 0.616 0.5377
## brain_edema 2.827e+00 2.248e+00 7.054e+03 1.258 0.2085
## brain_compression 1.964e+00 2.983e+00 7.054e+03 0.658 0.5103
## unconsciousness -1.796e+00 4.120e-01 7.054e+03 -4.360 1.32e-05 ***
## vital_signhr:gcs_cat.L 1.594e+01 8.277e-01 7.070e+03 19.256 < 2e-16 ***
## vital_signhr:gcs_cat.Q 5.469e+00 9.541e-01 7.070e+03 5.732 1.03e-08 ***
## vital_signhr:gcs_cat.C 6.875e-01 1.067e+00 7.070e+03 0.644 0.5193
## vital_signhr:sexm 4.538e-01 5.448e-01 7.070e+03 0.833 0.4048
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare models
anova(model1, model1_sex)
## Data: poly_hemo
## Models:
## model1: value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
## model1_sex: value ~ vital_sign * gcs_cat + vital_sign * sex + iss_cat + invasive + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 27 119615 119819 -59780 119561
## model1_sex 28 119616 119828 -59780 119560 0.6944 1 0.4047
Note that following our discussion last time I also fit the model with an interaction with sex. However, neither the interaction the main effect were significant and it resulted in a higher AIC score. In the model without interaction, the main effect is significant, indicating higher bp and hr for men.
We can see a significant positive linear effect of age_cat.
Let’s look at some diagnostics plots
plot(model1) # Looks ok
qqnorm(resid(model1), main="Q-Q Plot of Residuals")
qqline(resid(model1))
# Half-normal plot of residuals (approximation using car package)
car::qqPlot(resid(model1), distribution="norm", envelope=0.95,
main="Half-Normal Plot of Residuals", ylab="Residuals")
## [1] 3489 4990
# Predictions
poly_hemo$predicted <- predict(model1, newdata = poly_hemo, allow.new.levels = TRUE, re.form = NULL) # Include random effects
# Plot predicted vs observed
ggplot(poly_hemo, aes(x=value, y=predicted)) +
geom_point(alpha=0.5) +
geom_abline(intercept=0, slope=1, color="red") +
facet_wrap(~vital_sign, scales="fixed") +
labs(title="Predicted vs Observed Values", x="Observed", y="Predicted") +
theme_minimal()
# Examine interaction effects with ordered factors
ggplot(poly_hemo, aes(x=gcs_cat, y=value, fill=vital_sign)) +
geom_boxplot() +
facet_wrap(~vital_sign, scales="free_y") +
labs(title="BP and HR by GCS Category", x="GCS Category", y="Value") +
theme_minimal()
The qq plot looks like there is a bit of skewness, we could try to fit it with a log trafo.
model1_log <- lmer(log(value + 0.0001) ~ vital_sign * gcs_cat + iss_cat + invasive + sex +
age_cat + bleeding + fracture + concussion + brain_edema +
brain_compression + unconsciousness + (1|research_case_id),
data = poly_hemo)
summary(model1_log)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(value + 1e-04) ~ vital_sign * gcs_cat + iss_cat + invasive +
## sex + age_cat + bleeding + fracture + concussion + brain_edema +
## brain_compression + unconsciousness + (1 | research_case_id)
## Data: poly_hemo
##
## REML criterion at convergence: 2414.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -44.258 -0.401 0.023 0.414 15.633
##
## Random effects:
## Groups Name Variance Std.Dev.
## research_case_id (Intercept) 0.03188 0.1786
## Residual 0.04380 0.2093
## Number of obs: 14150, groups: research_case_id, 7075
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 4.495e+00 9.443e-03 8.834e+03 475.960 < 2e-16 ***
## vital_signhr -1.218e-01 6.366e-03 7.071e+03 -19.133 < 2e-16 ***
## gcs_cat.L -1.047e-01 1.160e-02 1.085e+04 -9.024 < 2e-16 ***
## gcs_cat.Q -5.650e-02 1.216e-02 1.172e+04 -4.647 3.41e-06 ***
## gcs_cat.C -1.234e-02 1.334e-02 1.191e+04 -0.925 0.354787
## iss_cat.L -5.057e-02 1.247e-02 7.054e+03 -4.055 5.07e-05 ***
## iss_cat.Q -4.935e-02 1.005e-02 7.054e+03 -4.910 9.32e-07 ***
## iss_cat.C -1.096e-02 7.287e-03 7.054e+03 -1.504 0.132538
## invasive1 2.580e-02 1.433e-02 7.054e+03 1.801 0.071732 .
## sexm 4.892e-03 5.972e-03 7.054e+03 0.819 0.412767
## age_cat.L 3.846e-02 7.281e-03 7.054e+03 5.282 1.32e-07 ***
## age_cat.Q -5.247e-03 7.161e-03 7.054e+03 -0.733 0.463689
## age_cat.C 6.182e-03 7.433e-03 7.054e+03 0.832 0.405580
## age_cat^4 6.874e-03 7.549e-03 7.054e+03 0.911 0.362537
## age_cat^5 8.666e-03 7.900e-03 7.054e+03 1.097 0.272688
## age_cat^6 -2.900e-03 7.838e-03 7.054e+03 -0.370 0.711390
## bleeding -9.045e-03 1.003e-02 7.054e+03 -0.902 0.367156
## fracture -8.448e-04 9.303e-03 7.054e+03 -0.091 0.927648
## concussion 1.662e-03 7.516e-03 7.054e+03 0.221 0.824960
## brain_edema 4.868e-02 4.218e-02 7.054e+03 1.154 0.248452
## brain_compression 4.291e-02 5.598e-02 7.054e+03 0.767 0.443403
## unconsciousness -1.281e-02 7.730e-03 7.054e+03 -1.657 0.097498 .
## vital_signhr:gcs_cat.L 1.688e-01 1.103e-02 7.071e+03 15.314 < 2e-16 ***
## vital_signhr:gcs_cat.Q 4.796e-02 1.273e-02 7.071e+03 3.766 0.000167 ***
## vital_signhr:gcs_cat.C -7.866e-04 1.424e-02 7.071e+03 -0.055 0.955938
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model1, model1_log)
## Data: poly_hemo
## Models:
## model1: value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
## model1_log: log(value + 1e-04) ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model1 27 119615 119819 -59780 119561
## model1_log 27 2278 2482 -1112 2224 117337 0
# Some Diagnostics
plot(model1_log)
qqnorm(resid(model1_log), main="Q-Q Plot of Residuals")
qqline(resid(model1_log))
# Half-normal plot of residuals (approximation using car package)
car::qqPlot(resid(model1), distribution="norm", envelope=0.95,
main="Half-Normal Plot of Residuals", ylab="Residuals")
## [1] 3489 4990
# Predictions
poly_hemo$predicted_log <- predict(model1_log, newdata = poly_hemo, allow.new.levels = TRUE, re.form = NULL) # Include random effects
poly_hemo$predicted_log_trafo <- exp(poly_hemo$predicted_log) - 0.0001
# Plot predicted vs observed
ggplot(poly_hemo, aes(x=value, y=predicted_log_trafo)) +
geom_point(alpha=0.5) +
geom_abline(intercept=0, slope=1, color="red") +
facet_wrap(~vital_sign, scales="fixed") +
labs(title="Predicted vs Observed Values", x="Observed", y="Predicted") +
theme_minimal()
Changes in interpretation: * Sex & unconsciousness is not
significant anymore * ISS not only quadratic barely significant but both
linear and quadratic highly significant (negative), which is to be
expected
At least to me, qqplot and predictions look better with a log trafo!
However, we can see a few outliers. What worries me is that they did not show before the log trafo. We can have a look at them:
# TODO
# wrong ones before
Note that since we only have temperature (single outcome variable; continuous) and no multivariate case here, there’s no point in fitting a lmer() model, since it’s not sensible to have random effects for research_case_id (here, they are unique!).
poly_temp <- poly %>% drop_na(temperature)
qqnorm(poly_temp$temperature, main="Q-Q Plot for Temperature")
qqline(poly_temp$temperature)
model2 <- lm(temperature ~ gcs_cat + iss_cat + invasive + sex + age_cat +
bleeding + fracture + concussion + brain_edema + brain_compression +
unconsciousness, data = poly_temp)
summary(model2)
##
## Call:
## lm(formula = temperature ~ gcs_cat + iss_cat + invasive + sex +
## age_cat + bleeding + fracture + concussion + brain_edema +
## brain_compression + unconsciousness, data = poly_temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.8229 -0.3107 0.0125 0.3595 8.0535
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.280295 0.028738 1262.469 < 2e-16 ***
## gcs_cat.L -0.705540 0.033036 -21.357 < 2e-16 ***
## gcs_cat.Q -0.212251 0.033377 -6.359 2.17e-10 ***
## gcs_cat.C -0.055648 0.036230 -1.536 0.124599
## iss_cat.L -0.339735 0.040948 -8.297 < 2e-16 ***
## iss_cat.Q -0.146951 0.032825 -4.477 7.71e-06 ***
## iss_cat.C -0.021197 0.023543 -0.900 0.367966
## invasive1 -0.012699 0.046213 -0.275 0.783492
## sexm 0.014717 0.018893 0.779 0.436018
## age_cat.L -0.089488 0.023037 -3.885 0.000104 ***
## age_cat.Q 0.018338 0.022703 0.808 0.419278
## age_cat.C 0.025538 0.023456 1.089 0.276306
## age_cat^4 -0.002723 0.023835 -0.114 0.909035
## age_cat^5 -0.003950 0.024935 -0.158 0.874144
## age_cat^6 -0.003952 0.024819 -0.159 0.873498
## bleeding 0.152813 0.031899 4.791 1.70e-06 ***
## fracture 0.010354 0.029658 0.349 0.727021
## concussion 0.067613 0.023877 2.832 0.004645 **
## brain_edema -0.278902 0.138117 -2.019 0.043496 *
## brain_compression -0.132814 0.191376 -0.694 0.487711
## unconsciousness 0.010731 0.024695 0.435 0.663914
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7001 on 6436 degrees of freedom
## Multiple R-squared: 0.1145, Adjusted R-squared: 0.1118
## F-statistic: 41.61 on 20 and 6436 DF, p-value: < 2.2e-16
# Figure out what to do with NA --> make one model with removed
# --> one model with categories
Interpretation:
Let’s look at some diagnostics:
par(mfrow=c(2,2))
plot(model2)
par(mfrow=c(1,1))
# Half-normal plot of residuals
car::qqPlot(resid(model2), distribution="norm", envelope=0.95,
main="Half-Normal Plot of Residuals", ylab="Residuals")
## [1] 1745 4149
# Predictions
poly_temp$temp_predicted <- predict(model2, poly_temp)
# Plot predicted vs observed
ggplot(poly_temp, aes(x=temperature, y=temp_predicted)) +
geom_point(alpha=0.5) +
geom_abline(intercept=0, slope=1, color="red") +
labs(title="Predicted vs Observed Temperature", x="Observed", y="Predicted") +
theme_minimal()
I don’t know, doesn’t look so good.
Just look at effects:
# Examine effects of ordered factors
ggplot(poly_temp, aes(x=gcs_cat, y=temperature)) +
geom_boxplot() +
labs(title="Temperature by GCS Category", x="GCS Category", y="Temperature") +
theme_minimal()
ggplot(poly_temp, aes(x=iss_cat, y=temperature)) +
geom_boxplot() +
labs(title="Temperature by ISS Category", x="ISS Category", y="Temperature") +
theme_minimal()
ggplot(poly_temp, aes(x=age_cat, y=temperature)) +
geom_boxplot() +
labs(title="Temperature by Age Category", x="Age Category", y="Temperature") +
theme_minimal()
Same thing as above; just use lm.
poly_inr <- poly %>% drop_na(inr)
model3 <- lm(inr ~ gcs_cat + iss_cat + invasive + sex + age_cat +
bleeding + fracture + concussion + brain_edema + brain_compression +
unconsciousness, data = poly_inr)
summary(model3)
##
## Call:
## lm(formula = inr ~ gcs_cat + iss_cat + invasive + sex + age_cat +
## bleeding + fracture + concussion + brain_edema + brain_compression +
## unconsciousness, data = poly_inr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6226 -0.1714 -0.0679 0.0201 8.4128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.279481 0.018358 69.697 < 2e-16 ***
## gcs_cat.L 0.153019 0.021149 7.235 5.16e-13 ***
## gcs_cat.Q 0.020010 0.021501 0.931 0.352055
## gcs_cat.C 0.021486 0.023452 0.916 0.359597
## iss_cat.L 0.100529 0.025628 3.923 8.85e-05 ***
## iss_cat.Q 0.031382 0.020648 1.520 0.128599
## iss_cat.C -0.012946 0.015009 -0.863 0.388422
## invasive1 0.050266 0.029318 1.715 0.086480 .
## sexm 0.045608 0.012386 3.682 0.000233 ***
## age_cat.L 0.212063 0.015076 14.067 < 2e-16 ***
## age_cat.Q 0.126273 0.014819 8.521 < 2e-16 ***
## age_cat.C 0.008021 0.015371 0.522 0.601806
## age_cat^4 -0.007263 0.015584 -0.466 0.641181
## age_cat^5 -0.009699 0.016323 -0.594 0.552411
## age_cat^6 -0.002581 0.016188 -0.159 0.873308
## bleeding -0.024893 0.020607 -1.208 0.227108
## fracture -0.042573 0.019150 -2.223 0.026239 *
## concussion -0.038310 0.015502 -2.471 0.013488 *
## brain_edema 0.114685 0.088663 1.293 0.195888
## brain_compression 0.169688 0.115377 1.471 0.141413
## unconsciousness -0.035385 0.015939 -2.220 0.026449 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4664 on 6674 degrees of freedom
## Multiple R-squared: 0.06086, Adjusted R-squared: 0.05805
## F-statistic: 21.62 on 20 and 6674 DF, p-value: < 2.2e-16
Let’s look at some diagnostics:
par(mfrow=c(2,2))
plot(model3)
par(mfrow=c(1,1))
# Examine relationship between INR and ordered factors
ggplot(poly, aes(x=gcs_cat, y=inr)) +
geom_boxplot() +
labs(title="INR by GCS Category", x="GCS Category", y="INR") +
theme_minimal()
summary(poly$inr)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.80 1.10 1.10 1.22 1.20 10.00 380
boxplot(poly$inr)
boxplot(log(poly$inr))
Very bad. If we look at the last plots it’s no wonder, a lot of outliers or very high values distorting it. Maybe approach with ordered factors can help.
Try with a log trafo:
model3_log <- lm(log(inr + 0.0001) ~ gcs_cat + iss_cat + invasive + sex + age_cat +
bleeding + fracture + concussion + brain_edema + brain_compression +
unconsciousness, data = poly_inr)
summary(model3_log)
##
## Call:
## lm(formula = log(inr + 1e-04) ~ gcs_cat + iss_cat + invasive +
## sex + age_cat + bleeding + fracture + concussion + brain_edema +
## brain_compression + unconsciousness, data = poly_inr)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40702 -0.11179 -0.03358 0.02994 2.02435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.202474 0.008772 23.081 < 2e-16 ***
## gcs_cat.L 0.099329 0.010106 9.829 < 2e-16 ***
## gcs_cat.Q 0.011733 0.010274 1.142 0.253497
## gcs_cat.C 0.012231 0.011206 1.091 0.275125
## iss_cat.L 0.066232 0.012246 5.408 6.59e-08 ***
## iss_cat.Q 0.013092 0.009867 1.327 0.184618
## iss_cat.C -0.010313 0.007172 -1.438 0.150484
## invasive1 0.029004 0.014010 2.070 0.038466 *
## sexm 0.031779 0.005919 5.369 8.18e-08 ***
## age_cat.L 0.111466 0.007204 15.473 < 2e-16 ***
## age_cat.Q 0.076526 0.007081 10.807 < 2e-16 ***
## age_cat.C 0.005911 0.007345 0.805 0.420985
## age_cat^4 -0.002012 0.007447 -0.270 0.787051
## age_cat^5 -0.006708 0.007800 -0.860 0.389862
## age_cat^6 -0.003508 0.007736 -0.454 0.650184
## bleeding -0.023573 0.009847 -2.394 0.016701 *
## fracture -0.021178 0.009151 -2.314 0.020678 *
## concussion -0.026092 0.007408 -3.522 0.000431 ***
## brain_edema 0.094174 0.042368 2.223 0.026266 *
## brain_compression 0.089951 0.055133 1.632 0.102828
## unconsciousness -0.023403 0.007616 -3.073 0.002130 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2229 on 6674 degrees of freedom
## Multiple R-squared: 0.08771, Adjusted R-squared: 0.08498
## F-statistic: 32.08 on 20 and 6674 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model3_log)
par(mfrow=c(1,1))
Same result. Helps a bit, but not nearly enough.